function [betac_dist,betac_grid,steinberg_moments_data] = data_load_steinberg(number_grid,number_educ,data_dir,paper_dir)

disp('Running script to load and transform Steinberg data...')

data = readtable([data_dir,'steinberg_data_nomiss.csv']);
data = table2array(data);

cd(paper_dir)

%  Variables in data set from Larry Steinberg:
% 1) ID
% 2) AGE
% 3) RACE
% 4) AFRAM
% 5) ASIAN
% 6) WHITE
% 7) LATINA
% 8) SES
% 9) MALE
% 10) IQ
% 11) TIPPOINT
% 12) DISCOUNT
% 13) LOGK
% 14) TIPPOIN0
% 15) TIPPOIN1
% 16) TIPPOIN2
% 17) TIPPOIN3
% 18) TIPPOIN4
% 19) TIPPOIN5

% INFO FROM STEINBERG:
% SES was coded as follows:
%Grade 1 through 12 reported in double digits from 01 to 12,
%13 = some college,
%14 = college degree,
%15 = education beyond college

% Load data on various tipping (indifference) points and covariates:
tippoin0 = data(:,14);
tippoin1 = data(:,15);
tippoin2 = data(:,16);
tippoin3 = data(:,17);
tippoin4 = data(:,18);
tippoin5 = data(:,19);
iq = data(:,10);
male = data(:,9);
ses = data(:,8);
age = data(:,2);

% Note: the variables 'tippoin0' through 'tippoin5' ask respondents about a
% 1 day/1 week/1 month/3 month/6 month/12 month delay, where the delayed
% reward is kept fixed to $1000, and the tippoint is defined as the
% indifference point (between 0 and 1000).

% See also table 2 on p36 in Steinberg paper to cross-reference.

Nobs=size(data,1); % number of individuals

% Define age categories:
agecat = zeros(Nobs,1);
agecat((age>= 10 & age <= 11)==1) = 1;
agecat((age>= 12 & age <= 13)==1) = 2;
agecat((age>= 14 & age <= 15)==1) = 3;
agecat((age>= 16 & age <= 17)==1) = 4;
agecat((age>= 18 & age <= 21)==1) = 5;
agecat((age>= 22 & age <= 25)==1) = 6;
agecat((age>= 26 & age <= 30)==1) = 7;

% We elicit discount factors by assuming log-utility and a quasi-hyperbolic 
% discounting function to get rid of the present bias (see also paper Appendix).

betas = nan(Nobs,1);
betas(:,1) = (log(tippoin5)./log(tippoin4)).^2;

tmpgt1 = tippoin5 > tippoin4; % irrational - drop these
betas(tmpgt1==1,1) = NaN;

% Plot average annual discount factor by age group (smooth out noise)
agevec1 = [10:2:28];
agevec2 = [11:2:27 30];
beta_avg = NaN(length(agevec1),2);
for ii= 1:length(agevec1)
    tmprows = (age>=agevec1(ii) & age <= agevec2(ii));
    beta_avg(ii,1) = nanmean(betas(tmprows));
    beta_avg(ii,2) = prctile(betas(tmprows),50);
    beta_avg(ii,3) = prctile(betas(tmprows),25);
    beta_avg(ii,4) = prctile(betas(tmprows),75);
end

f2 = figure;
set(f2,'visible','off');
plot(agevec1,beta_avg(:,1),'k-o',agevec1,beta_avg(:,3),'k--',agevec1,beta_avg(:,2),'k-d',agevec1,beta_avg(:,4),'k-.');
xlabel('Age'); ylabel('Annual discount factor \beta'); %xlim([0 1.05]);
h = legend('Mean','Q1','Median','Q3','Location','southoutside');
saveas(f2,'fig_steinberg_dist_beta_byage', 'png')
saveas(f2,'fig_steinberg_dist_beta_byage', 'eps')
close all; % close figures

% 5 cat: ( all ages)
beg_age = [10 13 18 22 26 10];
end_age = [12 17 21 25 30 30];

% Consider 3 education categories: 1) HS or less (SES = 0-12), 2) some college or college (SES = 13-14), and 3) graduate (SES = 15)
beg_educ = [0 13 15 0]; % min(ses) = 0
end_educ = [12 14 15 15]; % max(ses) = 15 = graduate

beta_avg = NaN(length(beg_age),6,length(beg_educ));

for ii= 1:length(beg_age)
    for jj = 1:length(beg_educ)
        row_ind = (age >= beg_age(ii) & age <= end_age(ii) & ses >= beg_educ(jj) & ses <= end_educ(jj));
        beta_avg(ii,1,jj) = nanmean(betas(row_ind));
        beta_avg(ii,2,jj) = nanmedian(betas(row_ind));
        beta_avg(ii,3,jj) = nanstd(betas(row_ind));
        beta_avg(ii,4,jj) = prctile(betas(row_ind),25);
        beta_avg(ii,5,jj) = prctile(betas(row_ind),75);
        beta_avg(ii,6,jj) = sum(~isnan(betas(row_ind)));
    end
end

tmpp = 1:2; % second figure: only plot youngest two age categories

% Plot average child discount factors by age and parental SES
f2 = figure;
set(f2,'visible','off');
plot(beg_age(tmpp),beta_avg(tmpp,1,3),'k-',...
    beg_age(tmpp),beta_avg(tmpp,1,2),'k-.',...
    beg_age(tmpp),beta_avg(tmpp,1,1),'k--');
xlabel('Child Age'); ylabel('Annual discount factor \beta'); %xlim([0 1.05]);
xlim([min(beg_age(tmpp))-0.5 max(beg_age(tmpp))+0.5]);
ylim([0.6 0.85]);
set(gca,'XTick',beg_age(tmpp))
h = legend('Parent educ. - Graduate','Parent educ. - Some college or College','Parent educ. - High school or less','Location','southoutside');
set(gca,'XTickLabel',char('10-12','13-17'))
set(h, 'interpreter','latex');
saveas(f2,'fig_steinberg_beta_byageSES_means', 'png')
saveas(f2,'fig_steinberg_beta_byageSES_means', 'eps')
close all; % close figures

%% Calculate grid based on subsample of ages 10-17

% Delete all betas for ages 18+, i.e. agecat 5,6,7
cutoffcat = 5; % only keep ages 10-17 (or age categories 1-4)

% Generate schooling variables
% hs or less
hs = nan(Nobs,1);
hs(ses<=12) = 1;
hs(ses>12) = 0;

% some college
sc = nan(Nobs,1);
sc(ses == 13) = 1;
sc(ses ~= 13) = 0;

% college
coll = nan(Nobs,1);
coll(ses == 14) = 1;
coll(ses ~= 14) = 0;

% graduate
grad = nan(Nobs,1);
grad(ses == 15) = 1;
grad(ses < 15) = 0;

betas(agecat>=cutoffcat) = NaN;
hs(agecat>=cutoffcat) = NaN;
sc(agecat>=cutoffcat) = NaN;
coll(agecat>=cutoffcat) = NaN;
grad(agecat>=cutoffcat) = NaN;
age(agecat>=cutoffcat) = NaN;

% Now define conditional distributions from which to draw the child's
% random initial discount factor, conditional on IQ tercile (and ignoring
% initial age in 1997 for now)

% Define grid points based on unconditional sample
% number_grid represents number of discrete support points, so to get
% 3 grid points, we need the 33/67 percentiles, and then define the
% grid points as the average beta within each window

betas_quantiles = linspace(0,1,number_grid+1);
cutpoints = [0 quantile(betas,betas_quantiles(2:end-1)) 1];

% Give error warning if we have some repeated grid points (due to too much
% mass being concentrated there)
if length(unique(cutpoints)) < length(cutpoints)
    error('Grid points for beta_c are not unique!')
end

% % % disp('distribution by beta bin, all educ cat')

Ntot = sum(~isnan(betas));
% define first interval as [0,cutpoint1)
% define all middle intervals as [cutpoint_i,cutpoint_i+1)
% define last interval as [cutpointN,1]
betac_dist_uncond = nan(number_grid,1);
for bb=2:length(cutpoints)
    if bb == length(cutpoints) % last interval
        % disp(['Pr(',num2str(cutpoints(bb-1)),' <= beta <= ',num2str(cutpoints(bb)),') = '])
        betac_dist_uncond(bb-1) = nansum(betas>=cutpoints(bb-1)& betas<=cutpoints(bb))/Ntot;
    else % all other intervals
        % disp(['Pr(',num2str(cutpoints(bb-1)),' <= beta < ',num2str(cutpoints(bb)),') = '])
        betac_dist_uncond(bb-1) = nansum(betas>=cutpoints(bb-1)& betas<cutpoints(bb))/Ntot;
    end
end

% Define discretized grid

betac_grid = nan(number_grid,1);
for bb=2:length(cutpoints)
    if bb == length(cutpoints) % last interval
        betac_grid(bb-1) = mean(betas(betas>=cutpoints(bb-1)& betas<=cutpoints(bb)));
    else
        betac_grid(bb-1) = mean(betas(betas>=cutpoints(bb-1)& betas<cutpoints(bb)));
    end
end
% disp('The obtained grid of child discount factors is:')
% betac_grid

% Check the approximation
% disp('Approximation using grid of betas:')
% sum(betac_grid.*betac_dist_uncond)
% disp('Average of actual betas:')
% nanmean(betas)

% define prob distributions based on number of educational types we want
betac_dist = nan(number_grid,number_educ); % beta levels x education levels
tmpp = NaN(size(betas,1),number_educ);

% Default: assume 3 education categories
tmpp(:,1) = hs; % hs or less
tmpp(:,2) = sc+coll; % some college
tmpp(:,3) = grad; % college or more

% Calculate data moments to put in paper

age1 = [10 10 13];
age2 = [17 12 17];

steinberg_moments_data = NaN(length(age1)*(number_educ+1), 2); % rows = by age and SES cat / all hh, columns = mean/stdev

% top 4 rows = all ages, next 4 rows = age 10-12, next 4 rows = age 13-17

for ii = 1:length(age1)
    for j = 1:number_educ
        tmprows = age >= age1(ii) & age <= age2(ii) & tmpp(:,j)==1;
        Ntot = sum(~isnan(betas(tmprows)));
        steinberg_moments_data((ii-1)*(number_educ+1)+j,:) = [nanmean(betas(tmprows)) nanstd(betas(tmprows))];
        if ii==1 % base betac_dist on entire age group 10-17
            for bb=2:length(cutpoints)
                if bb == length(cutpoints) % last interval
                    betac_dist(bb-1,j) = nansum(betas(tmprows)>=cutpoints(bb-1)& betas(tmprows)<=cutpoints(bb))/Ntot;
                else
                    betac_dist(bb-1,j) = nansum(betas(tmprows)>=cutpoints(bb-1)& betas(tmprows)<cutpoints(bb))/Ntot;
                end
            end
        end
        % % %     disp(['Note: Steinberg sample size for educ group ',num2str(j),' = ',num2str(Ntot)]);
    end
    % now for all hh within the age category (averaging across all educ)
    tmprows = age >= age1(ii) & age <= age2(ii);
    steinberg_moments_data((ii-1)*(number_educ+1)+number_educ+1,:) = [nanmean(betas(tmprows)) nanstd(betas(tmprows))];
end

end % function